library("tidyr")
library("tidyverse")
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v dplyr   1.0.2
## v tibble  3.0.4     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library("dplyr")
library("ggplot2")
library("readr")
library("rstatix")
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
library("pastecs")
## 
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## The following object is masked from 'package:tidyr':
## 
##     extract
library("readr")

1 Ugulama II: Lego Veri Seti İle İlgili Uygulamalar

Lego veri seti veri seti uzerinde veri temizleme, veri duzenleme, gerekli gorulen fonksiyonlarin veri seti uzerinde kullanilmasi, tanimsal istatistikleri elde etme, gorsellestirme uygulanmistir.

Veri Seti kaggle uzerinden https://www.kaggle.com/rtatman/lego-database linki ile alinmistir.

colours <- read.csv('C:/Users/oztur/OneDrive/Masaüstü/Lego/colors.csv')
inventoryParts <- read.csv("C:/Users/oztur/OneDrive/Masaüstü/Lego/inventory_parts.csv")
inventories <- read.csv("C:/Users/oztur/OneDrive/Masaüstü/Lego/inventories.csv")
sets <- read.csv("C:/Users/oztur/OneDrive/Masaüstü/Lego/sets.csv")

2 Veri Setinin Birleştirilmesi

lego <- sets%>%
    inner_join(inventories, by = c("set_num" = "set_num"), suffix=c(".set",".inv"))%>%
    inner_join(inventoryParts, by = c("id" = "inventory_id"), suffix=c("X",".invPart"))%>%
    inner_join(colours, by = c("color_id" = "id"), suffix=c("X",".col"))
lego

3 Dplyr Fonksiyonu ile işlemler

3.1 Select

YearColours verisiden nameX, year, name.col ve rgb değişkenlerini getir?

select(lego, nameX, year, name.col, rgb)

3.2 Filter

Sadece nameX değişkeni için Weetabix Promotional House 1 olanları getir.

filter(lego, nameX %in% c("Weetabix Promotional House 1"))

3.3 Arrange

Heart verisini colour_id değişkenine gore kucukten buyuge sırala?

arrange(lego,color_id)

3.4 Summarise

Heart verisetinin gözlem sayısı, nameX, name.col ve is_trans değişkenlerinin sayilarini bulununz

summarise(lego,Gozlem_Sayisi=n(),
          nameX_Destinasyonlari=n_distinct(nameX),
          name.col_Destinasyonlari= n_distinct(name.col),
          is_trans_Destinasyonlari= n_distinct(is_trans))

3.5 Group By

nameX değişkenini kategorilere gore dagilimi

lego %>% group_by(nameX) %>% summarise(Adet=n())
## `summarise()` ungrouping output (override with `.groups` argument)

3.6 Pipe Operatörü

3.6.1 Pipe Operatörü ile Verinin Yıllara Ve Renk Türüne Göre Verinin Gruplandırılması

3.6.1.1 Verinin Ayrılma İşlemi

yearColours <- lego%>%
    mutate(decade = paste(substring(year,1,3),"0s",sep = ""))%>%
    mutate(data.frame(t(col2rgb(paste("#",rgb,sep = "")))))%>%
    mutate(data.frame(t(rgb2hsv(red,blue,green))))%>%
    select(decade, red, green, blue, hue = h, saturation = s, lightness = v)%>%
    group_by(decade)%>%
    summarise(red = mean(red)/2.55, green = mean(green)/2.55, blue = mean(blue)/2.55, saturation = mean(saturation)*100, lightness = mean(lightness)*100)%>%
    mutate_at(c("red","green","blue","saturation","lightness"),round)
## `summarise()` ungrouping output (override with `.groups` argument)
yearColours

3.6.1.2 Ayrılan Verinin Grafiği

yearColoursPvt <- yearColours%>%
    pivot_longer(c("red","green","blue","saturation","lightness"), names_to = "property")
#output a plot
yearColoursPvt%>%
    ggplot(aes(y = value, x = decade, linetype = property, shape = property, colour = property, group = property))+
    geom_point(size = 3)+
    geom_line(size = 1.5)+
    theme_light()+
    theme(axis.title.x = element_blank(), plot.title = element_text(hjust = .5))+
    labs(title = "Yıllara Göre Lego Renkleri", y = "Yoğunluk")+
    coord_cartesian(ylim = c(0,100), expand = F)

3.6.2 Pipe Operatörü ile 1950 - 2017 Yılları Arasında Legolardaki Renklerin Dağılımı

colours <- colours %>% mutate(rgb = paste0("#", rgb))

brick_colours <- sets %>%inner_join(inventories, by = c("set_num" = "set_num"), suffix=c(".set",".inv"))%>%
                         inner_join(inventoryParts, by = c("id" = "inventory_id"), suffix=c("X",".invPart"))%>%
                         inner_join(colours, by = c("color_id" = "id"), suffix=c("X",".col")) %>% 
                         select(year, rgb, quantity) %>% 
                         na.omit %>%
                         group_by(rgb, year) %>% 
                         summarize(total = sum(quantity)) 
## `summarise()` regrouping output by 'rgb' (override with `.groups` argument)
pal <- colours$rgb
names(pal) <- pal 

breaks <- seq(1950, 2017, by = 10)
 
brick_colours %>%
          ggplot( aes(x = year, fill = rgb)) + 
          geom_bar() + 
          labs(x =   "", y = "")  +          
          ggtitle("1950 - 2017 Yılları Arasında Legolardaki Renklerin Dağılımı") +
          scale_fill_manual(values = pal)+ 
          scale_x_discrete(limits = breaks) +          
          theme_light( ) + 
          theme(
                panel.background = element_rect(fill = "#f0f0f0"),
                plot.background = element_rect(fill = "#f8f8f8"),
                legend.position = "none", 
                text = element_text(size = 13),
                plot.title = element_text(size = rel(1)),
                axis.text.y = element_blank(), 
                panel.grid = element_blank()
                )
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?